{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "def3ec8b",
   "metadata": {},
   "source": [
    "### P034 使用Numpy实现线性回归"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "e5c7cc75",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5baa1be5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>years</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>4000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>4250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>4500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>4750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>5000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>5250</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   years  salary\n",
       "0      1    4000\n",
       "1      2    4250\n",
       "2      3    4500\n",
       "3      4    4750\n",
       "4      5    5000\n",
       "5      6    5250"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({'years': [1, 2, 3, 4, 5, 6],\n",
    "                   'salary': [4000, 4250, 4500, 4750, 5000, 5250]})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e28a46ca",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "m = len(df)\n",
    "m"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "eb5b90ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "X1 = df['years'].values\n",
    "Y = df['salary'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d91e71bb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1],\n",
       "       [2],\n",
       "       [3],\n",
       "       [4],\n",
       "       [5],\n",
       "       [6]], dtype=int64)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X1 = X1.reshape(m, 1)\n",
    "X1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "77442d22",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bias = np.ones((m, 1))\n",
    "bias"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "e547b595",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1., 1.],\n",
       "       [1., 2.],\n",
       "       [1., 3.],\n",
       "       [1., 4.],\n",
       "       [1., 5.],\n",
       "       [1., 6.]])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = np.append(bias, X1, axis=1)\n",
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "1b39069d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 6., 21.],\n",
       "       [21., 91.]])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.dot(X.T, X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "037f897c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.86666667, -0.2       ],\n",
       "       [-0.2       ,  0.05714286]])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.linalg.inv(np.dot(X.T, X))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "722b09c5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 27750., 101500.])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.dot(X.T, Y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "76eed010",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3750.,  250.])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, Y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "cfc54bf7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Linear regression: 3750.00 + 250.00x\n"
     ]
    }
   ],
   "source": [
    "coefs = np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, Y))\n",
    "print(f'Linear regression: {coefs[0]:.2f} + {coefs[1]:.2f}x')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8edf5e04",
   "metadata": {},
   "source": [
    "### P035 使用Sklearn实现线性回归"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "d461b8c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LinearRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "251bfbb8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>years</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>4000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>4250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>4500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>4750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>5000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>5250</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   years  salary\n",
       "0      1    4000\n",
       "1      2    4250\n",
       "2      3    4500\n",
       "3      4    4750\n",
       "4      5    5000\n",
       "5      6    5250"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "365da07f",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = LinearRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "7c35fbe6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LinearRegression()"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(df[['years']], df[['salary']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "0abf5e15",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3750.])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.intercept_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "79092a9b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[250.]])"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.coef_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "49df8266",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Linear regression: 3750.00 + 250.00x'"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f'Linear regression: {model.intercept_[0]:.2f} + {model.coef_[0][0]:.2f}x'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d7b290a9",
   "metadata": {},
   "source": [
    "### P036 读取csv实现线性回归"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "ab17df70",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.linear_model import LinearRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "175b8077",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>variable</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.136602</td>\n",
       "      <td>-13.152922</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-1.418556</td>\n",
       "      <td>-36.316645</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.744814</td>\n",
       "      <td>104.184840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.232182</td>\n",
       "      <td>-15.822773</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-0.489337</td>\n",
       "      <td>-24.081511</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.289094</td>\n",
       "      <td>41.156585</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1.331587</td>\n",
       "      <td>101.416370</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>-0.529296</td>\n",
       "      <td>-24.057524</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1.128785</td>\n",
       "      <td>4.933819</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>-0.174600</td>\n",
       "      <td>44.850265</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   variable      target\n",
       "0 -1.136602  -13.152922\n",
       "1 -1.418556  -36.316645\n",
       "2  1.744814  104.184840\n",
       "3 -0.232182  -15.822773\n",
       "4 -0.489337  -24.081511\n",
       "5  0.289094   41.156585\n",
       "6  1.331587  101.416370\n",
       "7 -0.529296  -24.057524\n",
       "8  1.128785    4.933819\n",
       "9 -0.174600   44.850265"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('p036.csv')\n",
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "a7eba19b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>variable</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.136602</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-1.418556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.744814</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   variable\n",
       "0 -1.136602\n",
       "1 -1.418556\n",
       "2  1.744814"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = df[['variable']]\n",
    "data.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "a11e04d5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    -13.152922\n",
       "1    -36.316645\n",
       "2    104.184840\n",
       "Name: target, dtype: float64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target = df['target']\n",
    "target.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "351ccb1b",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = LinearRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "9cfaecc1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LinearRegression()"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(data, target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "ba33a9d9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5531873717999665"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.score(data, target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f29fb8da",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
